Introduction

This document describes an estimate of the positive predictive value (PPV) of variant discovery for all variants in the gene KCNH2 on long QT syndrome type 2 (LQT2). Additional details on the methods used are published Kroncke et al. 2020 PLOS Genetics and at the following website: (https://oates.app.vumc.org/vancart/SCN5A/SCN5A-report.html). We use observed and estimated LQT2 penetrance for all known KCNH2 variants as a way to assess the per variant PPV for variant discovery. Our objective is to develope a prior estimate of the per variant PPV on LQT2 which incorporates structure, function, and in silico predictors. We use these in silico and in vitro data to generate a Bayesian prior estimate of the per variant PPV since these data can be generated in a lab setting, unlike heterozygotes/carriers of KCNH2 variants which may or may not exist. The final posterior estimate combines this derived prior and clinically phenotyped heterozygotes/carriers.

Part 1: Calculate LQT2 Penetrance and LQT2 Penetrance Density using Various Subsets of the Literature and Cohort Data

All Literature Variants

# mut_type has includes type and isoform for all variants in the literature and in the cohort.
mut_type <- mut_type[mut_type$mut_type == "missense",]

# Cohort carrier/heterozygote counts and variant IDs. 
# Here we select only the missense variants (in-frame indels are also included as "missense")
cohort.data <- cohort.data[cohort.data$mut_type == "missense" & cohort.data$total_carriers > 0,]

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.nonoverlap.data[lit.nonoverlap.data$mut_type == "missense",]

# Here, heterozygotes/carriers from gnomAD are removed to test their influence on performance
# Remove comments in next two lines and complete subsequent evaluation to assess influence of gnomAD on
# calculations.
#d$total_carriers <- d$total_carriers - d$gnomAD # test influence of gnomAD
#d$unaff <- d$total_carriers - d$lqt2 # test influence of gnomAD

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" when evaluating ROC-AUC of n=1 variants from the literature

LQT2 empirical Penetrance prior

Use observed LQT2 penetrance to calculate “penetrance density” as described in previous publication. Plot penetrance density versus residue

# Mean squared error
mse <- function(sm) {
  mean((sm$residuals)^2*(sm$weights))
}

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2329 -0.1653 -0.0232  0.0763  0.7561 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2332     0.0135   17.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2323 on 789 degrees of freedom
##   (467 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model)#p*(1-p)

# Derive alpha and beta from weighted mean and MSE (estimated variance)
estBetaParams <- function(mu, var) {
  alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2
  beta <- alpha * (1 / mu - 1)
  return(params = list(alpha = alpha, beta = beta))
}

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.54041284982238   beta0 =  1.77720039413882"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

# Plot literature observed LQT2 penetrance versus residue number
m<- d %>% 
  select(resnum, pmean = penetrance_lqt2) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(d[,"penetrance_lqt2"]~as.numeric(d[,"resnum"]), span = 0.15)
plot(d$resnum, d$penetrance_lqt2, xlab ="Residue", ylab = "LQT2 Penetrance Estimate")
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

Calculate penetrance densities

With the updated empirical priors applied to carrier counts, calculate “penetrance density” as described in previous publication.

# NOTE: adjust 3rd argument given to funcdist, "d,", to d[d$total_carriers>1,] when 
# evaluating ROC of total_carriers == 1

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
mut_type <- mut_type[mut_type$mut_type == "missense" & mut_type$isoform == "A",]
for(rec in 1:nrow(mut_type)){
  if (!is.na(mut_type[rec, "resnum"])) {
    l<-length(h2.covariates[h2.covariates$var == mut_type[rec, "var"],"var"])
    for (m in 1:l){
    h2.covariates[h2.covariates$var == mut_type[rec, "var"], 
                  c("lqt2_dist", "lqt2_dist_weight", "resnum")][m,] <- c(funcdist(mut_type[rec, "resnum"], mut_type[rec, "var"], d, h2dist, "penetrance_lqt2", "sigmoid", 7), mut_type[rec, "resnum"])
    }
  }
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]
m<- h2.covariates %>% 
  select(resnum, pmean = lqt2_dist) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(h2.covariates[,"lqt2_dist"]~as.numeric(h2.covariates[,"resnum"]), span = 0.15)
plot(h2.covariates$resnum, h2.covariates$lqt2_dist, xlab ="Residue", ylab = "LQT2 Penetrance Density", xlim=c(0,1160))
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

Calculate Weighted Spearman Correlation Coefficients

Evaluate weighted Spearman correlations coefficients between observed LQT2 penetrance in the literature and various potential predictors

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so we can estimate LQT2 penetrance for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

calcPval=function(xName,yName,weightName,nPerms,new.mat2){
  # Pulls out variables

  x=new.mat2[,xName] 
  y=new.mat2[,yName] 
  w=new.mat2[,weightName]
  x2=x[!is.na(x)]
  y2=y[!is.na(x)]
  w2=w[!is.na(x)]

  # Calculate the real correlation
  realCorr=weightedCorr(x2,y2,method='spearman',weights=w2)
  # Do permutations, calculate fake correlations
  permutedCorrList=c()
  for(permNum in 1:nPerms){
    permutedX=sample(x2,length(x2),replace=FALSE)
    wCorrSim=weightedCorr(permutedX,y2,method='spearman',weights=w2)
    permutedCorrList=c(permutedCorrList,wCorrSim)
  }
  permutedCorrList2=abs(permutedCorrList)
  realCorr2=abs(realCorr)
  
  # Calculate pvalue
  summ=sum(realCorr2<permutedCorrList2)
  pValue=summ/nPerms
  return(list(realCorr,pValue,length(x2)))
}

calcAllPvals=function(yList,xList,nPerms,weightName,new.mat2){
  i=0
  resultTable=data.frame()
  for(yName in yList){
    for(xName in xList){
      i=i+1
      result=calcPval(xName,yName,weightName,nPerms,new.mat2)
      resultTable[i,'x']=xName
      resultTable[i,'y']=yName
      resultTable[i,'nPerms']=nPerms
      resultTable[i,'weightedCorr']=result[[1]]
      resultTable[i,'pValue']=result[[2]]
      resultTable[i,'n']=result[[3]]
      #print(resultTable[i,'pValue'])
    }
  }
  print(resultTable)
  return(resultTable)
}


yList=c('penetrance_lqt2')
xList=c('hm_ssPeak','hm_tailPeak','hm_vhalfact','hm_vhalfinact','hm_recovfrominact', 'hm_taudeact_fast', 
        'ht_ssPeak','ht_tailPeak','ht_vhalfact','ht_vhalfinact','ht_recovfrominact', 'ht_taudeact_fast',
        'pph2_prob', 'provean_score', "blast_pssm",
        'pamscore', 'aasimilaritymat', "lqt2_dist", "revel_score")
tmp<-d[!is.na(d$penetrance_lqt2) & !is.na(d$revel_score),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue   n
## 1          hm_ssPeak penetrance_lqt2   1000  0.319565106  0.271  20
## 2        hm_tailPeak penetrance_lqt2   1000 -0.564758635  0.000 100
## 3        hm_vhalfact penetrance_lqt2   1000  0.147612664  0.293  68
## 4      hm_vhalfinact penetrance_lqt2   1000  0.669666858  0.000  29
## 5  hm_recovfrominact penetrance_lqt2   1000 -0.734693865  0.016  12
## 6   hm_taudeact_fast penetrance_lqt2   1000 -0.375296727  0.028  42
## 7          ht_ssPeak penetrance_lqt2   1000 -0.464626144  0.021  34
## 8        ht_tailPeak penetrance_lqt2   1000 -0.616133062  0.000  83
## 9        ht_vhalfact penetrance_lqt2   1000 -0.061932733  0.709  41
## 10     ht_vhalfinact penetrance_lqt2   1000 -0.173738913  0.426  26
## 11 ht_recovfrominact penetrance_lqt2   1000 -0.407537689  0.416   6
## 12  ht_taudeact_fast penetrance_lqt2   1000  0.009703229  0.964  14
## 13         pph2_prob penetrance_lqt2   1000  0.392919452  0.000 741
## 14     provean_score penetrance_lqt2   1000 -0.585053878  0.000 741
## 15        blast_pssm penetrance_lqt2   1000 -0.250452543  0.000 741
## 16          pamscore penetrance_lqt2   1000 -0.116758403  0.016 750
## 17   aasimilaritymat penetrance_lqt2   1000  0.018401459  0.715 750
## 18         lqt2_dist penetrance_lqt2   1000  0.583417065  0.000 748
## 19       revel_score penetrance_lqt2   1000  0.664409462  0.000 750
tmp<-d[!is.na(d$ht_tailPeak) & !is.na(d$penetrance_lqt2) & !is.na(d$revel_score),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue  n
## 1          hm_ssPeak penetrance_lqt2   1000  0.690504615  0.071 10
## 2        hm_tailPeak penetrance_lqt2   1000 -0.428867090  0.003 66
## 3        hm_vhalfact penetrance_lqt2   1000 -0.093938286  0.672 35
## 4      hm_vhalfinact penetrance_lqt2   1000  0.629956576  0.050 15
## 5  hm_recovfrominact penetrance_lqt2   1000 -0.772690799  0.259  4
## 6   hm_taudeact_fast penetrance_lqt2   1000 -0.228864954  0.368 17
## 7          ht_ssPeak penetrance_lqt2   1000 -0.464626144  0.016 34
## 8        ht_tailPeak penetrance_lqt2   1000 -0.616133062  0.000 83
## 9        ht_vhalfact penetrance_lqt2   1000 -0.061096836  0.755 40
## 10     ht_vhalfinact penetrance_lqt2   1000 -0.173348992  0.461 25
## 11 ht_recovfrominact penetrance_lqt2   1000 -0.407537689  0.415  6
## 12  ht_taudeact_fast penetrance_lqt2   1000  0.009703229  0.969 14
## 13         pph2_prob penetrance_lqt2   1000  0.332107233  0.009 81
## 14     provean_score penetrance_lqt2   1000 -0.455185289  0.000 81
## 15        blast_pssm penetrance_lqt2   1000  0.055910704  0.674 81
## 16          pamscore penetrance_lqt2   1000  0.058560727  0.622 83
## 17   aasimilaritymat penetrance_lqt2   1000  0.063049344  0.611 83
## 18         lqt2_dist penetrance_lqt2   1000  0.755839461  0.000 83
## 19       revel_score penetrance_lqt2   1000  0.612183017  0.000 83

Scale all covariates

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 penetrance)

Literature Variants Where N = 1 Variants are removed

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.nonoverlap.data[lit.nonoverlap.data$mut_type == "missense",]

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 2,"weight"] <- 0.000 # This is changed to "< 2" here to evaluate ROC-AUC of n=1 variants from the literature

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" when evaluating ROC-AUC of n=1 variants from the literature

LQT2 empirical Penetrance prior

Use observed LQT2 penetrance to calculate “penetrance density” as described in previous publication. Plot penetrance density versus residue

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2329 -0.1653 -0.0232  0.0763  0.7561 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.2332     0.0135   17.27   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2323 on 789 degrees of freedom
##   (467 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model) #p*(1-p)

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.54041284982238   beta0 =  1.77720039413882"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

Calculate penetrance densities

With the updated empirical priors applied to carrier counts, calculate “penetrance density” as described in previous publication.

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
mut_type <- mut_type[mut_type$mut_type == "missense" & mut_type$isoform == "A",]
for(rec in 1:nrow(mut_type)){
  if (!is.na(mut_type[rec, "resnum"])) {
    h2.covariates[h2.covariates$var == mut_type[rec, "var"], 
                  c("lqt2_dist", "lqt2_dist_weight", "resnum")] <- c(funcdist(mut_type[rec, "resnum"], mut_type[rec, "var"], d[d$total_carriers>1,], h2dist, "penetrance_lqt2", "sigmoid", 7), mut_type[rec, "resnum"])
  }
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so  we can estimate LQT2 penetrance for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

Scale all covariates

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 penetrance)

Literature and Cohort Combined (for final predictions)

# Literature dataset where potentially overlapping carriers/heterozygotes are removed
d <- lit.cohort.data[lit.cohort.data$mut_type == "missense",]

# add all possible variants
allvariants<-data.frame(unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"var"]), stringsAsFactors = F)
names(allvariants)<-"var"
allvariants$isoform<-unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"isoform"])
allvariants$mut_type<-unlist(h2.covariates[!is.na(h2.covariates$cardiacboost),"mut_type"])
a<-merge(d,allvariants,all = T)
a[is.na(a$total_carriers),"total_carriers"] <- 0
a[is.na(a$lqt2),"lqt2"] <- 0
d<-a

# set initial weighting and penetrance
d$weight = 1-1/(0.01+d$total_carriers)
d$penetrance_lqt2 <- d$lqt2/d$total_carriers
d[d$total_carriers < 1,"weight"] <- 0.000 # This is changed to "< 2" here to evaluate ROC-AUC of n=1 variants from the literature

LQT2 empirical Penetrance prior

Use observed LQT2 penetrance to calculate “penetrance density” as described in previous publication. Plot penetrance density versus residue

# Mean squared error
mse <- function(sm) {
  mean((sm$residuals)^2*(sm$weights))
}

# Derive alpha and beta from weighted mean and MSE (estimated variance)
estBetaParams <- function(mu, var) {
  alpha <- ((1 - mu) / var - 1 / mu) * mu ^ 2
  beta <- alpha * (1 / mu - 1)
  return(params = list(alpha = alpha, beta = beta))
}

# Weighted mean to determine LQT2 penetrance empirical prior
newdata = data.frame(wt=1)
model <- lm(penetrance_lqt2 ~ 1, data=d, weights = d$weight)
summary(model)
## 
## Call:
## lm(formula = penetrance_lqt2 ~ 1, data = d, weights = d$weight)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.3424 -0.2429 -0.0341  0.0654  0.6315 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.34273    0.01253   27.35   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2541 on 1017 degrees of freedom
##   (6240 observations deleted due to missingness)
p<-predict(model, newdata)
dev<- mse(model) #p*(1-p)

# Estimated shape parameters for LQT2 empirical prior
alpha0 = estBetaParams(p,dev)$alpha
beta0 = estBetaParams(p,dev)$beta
print(paste("alpha0 = ", alpha0, "  beta0 = ", beta0))
## [1] "alpha0 =  0.854290338628253   beta0 =  1.63833103141397"
# Bayesian LQT2 penetrance estimates from empirical priors 
# and observed affected/unaffected counts:
d$lqt2_penetranceBayesian_initial <- (alpha0 + d[,"lqt2"])/((alpha0 + beta0 + d[,"total_carriers"]))
d$lqt2_penetranceBayesian<-d$lqt2_penetranceBayesian_initial

Calculate penetrance densities and annotate function and structural location

With the updated empirical priors applied to carrier counts, calculate “penetrance density” as described in previous publication. !!! NOTE: since these data are truly the “best estimates” we include all variants in the calculation such that unique scores are by residue not by variant.

h2.covariates[, "lqt2_dist"]<-NA
h2.covariates[, "lqt2_dist_weight"]<-NA
ld<-0
for(rec in seq(2,1159,1)){
  #print(rec)
  ld <- funcdist(rec, "var", d[!is.na(d$total_carriers) & d$total_carriers>0 & d$isoform == "A" & d$mut_type != "nonsense",], h2dist, "penetrance_lqt2", "sigmoid", 7)
  h2.covariates[!is.na(h2.covariates$isoform) & !is.na(h2.covariates$resnum) & h2.covariates$isoform=="A" & h2.covariates$resnum == rec & !is.na(h2.covariates$var) & !is.na(h2.covariates$mut_type) & h2.covariates$mut_type == "missense","lqt2_dist"] <- ld[1]
  h2.covariates[!is.na(h2.covariates$isoform) & !is.na(h2.covariates$resnum) & h2.covariates$isoform=="A" & h2.covariates$resnum == rec & !is.na(h2.covariates$var)& !is.na(h2.covariates$mut_type) & h2.covariates$mut_type == "missense","lqt2_dist_weight"] <-ld[2] 
}

# Plot lqt2_dist versus residue number
h2.covariates$resnum<-as.integer(h2.covariates$resnum)
h2.covariates <- h2.covariates[order(h2.covariates$resnum),]
h2.covariates <- h2.covariates[!is.na(h2.covariates$resnum),]

# Merge "d" with full variant list and set carrier counts to 0. 
# This is done for convenience so  we can estimate LQT2 penetrance for all variants including 
# those witheld during model construction. This will make validation easier.
d <- merge(d, h2.covariates, all = TRUE)
d <- unique(d) 

# annotate structural location (hotspot) 
d$Structure<-NA
d[!is.na(d$lqt2_dist) & d$lqt2_dist<0.1,"Structure"]<-"Non_Hotspot"
d[!is.na(d$lqt2_dist) & d$lqt2_dist>=0.1 & d$lqt2_dist<0.4,"Structure"]<-"Mild_Hotspot"
d[!is.na(d$lqt2_dist) & d$lqt2_dist>=0.4,"Structure"]<-"Hotspot"

# annotate functional perturbation
d$Function<-NA
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak<0.25,"Function"]<-"Severe Dominant LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak<0.5 & d$ht_tailPeak>=0.25,"Function"]<-"Dominant LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=0.5 & d$ht_tailPeak<0.75,"Function"]<-"LOF"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=0.75 & d$ht_tailPeak<1.25,"Function"]<-"Normal"
d[!is.na(d$ht_tailPeak) & d$ht_tailPeak>=1.25,"Function"]<-"GOF"

Calculate EM priors and posteriors for all variants

Use an EM algorithm to estimate LQT2 PPV for each variant (or LQT2 penetrance)

LQT2 penetrance in Cohort Data for validation

load("prepared_data.RData")
load("lit_all_data_checkpoint.RData")

cohort.data$weight = 1-1/(0.01+cohort.data$total_carriers)
cohort.data$penetrance_lqt2 <- cohort.data$lqt2/cohort.data$total_carriers

for (variant in cohort.data$var) {
  if (!is.na(match(variant, d$var))) {
     cohort.data[cohort.data$var == variant, c("p_mean_w","alpha", "beta", "lqt2_lit", "unaff_lit", "total_carriers_lit", "lqt2_dist")] <- d[match(variant, d$var), c("p_mean_w", "alpha", "beta", "lqt2", "unaff", "total_carriers", "lqt2_dist")]
  }
}

m<- cohort.data %>% 
  select(resnum, pmean = penetrance_lqt2) %>% 
  mutate(p_mean_smooth = rollmean(pmean, k=20, fill = NA))

fit <- loess(cohort.data[,"penetrance_lqt2"]~as.numeric(cohort.data[,"resnum"]), span = 0.15)
plot(cohort.data$resnum, cohort.data$penetrance_lqt2, xlab ="Residue", ylab = "LQT2 Observed Penetrance", col = "red", pch=19)
xrange <- seq(min(fit$x), max(fit$x), length.out = 100)
ps <- predict(fit, xrange, se=T)
lines(xrange, ps$fit*1, lwd=5)
lines(xrange, (ps$fit+1.96*ps$se.fit)*1, lty=2, lwd=4)
lines(xrange, (ps$fit-1.96*ps$se.fit)*1, lty=2, lwd=4)

# Add covariates to cohort dataset
cohort.data <- merge(cohort.data, h2.covariates, all = TRUE)
cohort.data <- unique(cohort.data) 


# Save cohort data
save(cohort.data, file = "cohort_checkpoint.RData")

Part 2: Coverage plots

Bootstrap and get the coverage rate

  1. Use the observed penetrance from as the TRUE penetrance, generate n binomial observations

  2. Use the final EM algorithm posterior as the prior for Beta-Binomial, incorporate data from step (1), generate the posterior distribution, and get 95% credible interval.

  3. Check whether the interval cover the true penetrance from Step 1.

  4. Repeat Step 1 to Step 3 N times to get the coverage rate.

Bootstrap function

BootsCoverage <- function(var,n=100,N=1000,true){
  
  # var: variant name
  # n: number of subjects in the new data
  # N: number of Bootstrap

  # extract the "true" penetrance
  true.p <- d[d$var==var,true]
  
  # generate binomial data 
  event <- rbinom(N,n,true.p)

  # get the posterior credible interval
  alpha <- d$alpha[which(d$var==var)] 
  beta <- d$beta[which(d$var==var)] 

  new.alpha <- alpha + event
  new.beta <- beta + n - event

  lb <- qbeta(0.025,new.alpha,new.beta)
  ub <- qbeta(0.975,new.alpha,new.beta)

  # change lb to floor of nearest 0.1
  lb <- floor(lb*20)/20
  ub <- ceiling(ub*20)/20
  
  return(sum(lb < true.p & ub > true.p)/N) 
}

Plot coverage

Observed penetrance as the “true” penetrance

The coverage plot where observed penetrance is the “true” penetrance and one hundred new observations are added is shown below.

Part 3: Variance explained

Pearson R^2 and Spearman Rho Against EM Posterior from Cohort

# load data, literature + gnomAD + cohort
load("lit_all_data_checkpoint.RData")
load("cohort_checkpoint.RData")

calcPval=function(xName,yName,weightName,nPerms,new.mat2){
  # Pulls out variables

  x=new.mat2[,xName] 
  y=new.mat2[,yName] 
  w=new.mat2[,weightName]
  x2=x[!is.na(x)]
  y2=y[!is.na(x)]
  w2=w[!is.na(x)]

  # Calculate the real correlation
  realCorr=weightedCorr(x2,y2,method='spearman',weights=w2)
  # Do permutations, calculate fake correlations
  permutedCorrList=c()
  for(permNum in 1:nPerms){
    permutedX=sample(x2,length(x2),replace=FALSE)
    wCorrSim=weightedCorr(permutedX,y2,method='spearman',weights=w2)
    permutedCorrList=c(permutedCorrList,wCorrSim)
  }
  permutedCorrList2=abs(permutedCorrList)
  realCorr2=abs(realCorr)
  
  # Calculate pvalue
  summ=sum(realCorr2<permutedCorrList2)
  pValue=summ/nPerms
  return(list(realCorr,pValue,length(x2)))
}

calcAllPvals=function(yList,xList,nPerms,weightName,new.mat2){
  i=0
  resultTable=data.frame()
  for(yName in yList){
    for(xName in xList){
      i=i+1
      result=calcPval(xName,yName,weightName,nPerms,new.mat2)
      resultTable[i,'x']=xName
      resultTable[i,'y']=yName
      resultTable[i,'nPerms']=nPerms
      resultTable[i,'weightedCorr']=result[[1]]
      resultTable[i,'pValue']=result[[2]]
      resultTable[i,'n']=result[[3]]
      #print(resultTable[i,'pValue'])
    }
  }
  print(resultTable)
  return(resultTable)
}

# Select covariates from isoform "A" in cohort dataset
cohort.data <- cohort.data[!is.na(cohort.data$total_carriers) & cohort.data$isoform == "A" & cohort.data$mut_type == "missense",]

tmp<-cohort.data[!is.na(cohort.data$provean_score) & !is.na(cohort.data$revel_score) & !is.na(cohort.data$pamscore),]
yList=c('penetrance_lqt2')
xList=c('hm_ssPeak','hm_tailPeak','hm_vhalfact','hm_vhalfinact','hm_recovfrominact', 'hm_taudeact_fast', 
        'ht_ssPeak','ht_tailPeak','ht_vhalfact','ht_vhalfinact','ht_recovfrominact', 'ht_taudeact_fast',
        'pph2_prob', 'provean_score', "blast_pssm",
        'pamscore', 'aasimilaritymat', "lqt2_dist", "revel_score", 'p_mean_w')
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue   n
## 1          hm_ssPeak penetrance_lqt2   1000  -0.13782555  0.758   8
## 2        hm_tailPeak penetrance_lqt2   1000  -0.45322142  0.007  46
## 3        hm_vhalfact penetrance_lqt2   1000   0.24454237  0.262  35
## 4      hm_vhalfinact penetrance_lqt2   1000   0.58824513  0.120  13
## 5  hm_recovfrominact penetrance_lqt2   1000  -0.01793348  0.972   8
## 6   hm_taudeact_fast penetrance_lqt2   1000   0.09551958  0.735  26
## 7          ht_ssPeak penetrance_lqt2   1000  -0.59432412  0.078  11
## 8        ht_tailPeak penetrance_lqt2   1000  -0.72598217  0.000  39
## 9        ht_vhalfact penetrance_lqt2   1000   0.04702470  0.814  22
## 10     ht_vhalfinact penetrance_lqt2   1000  -0.51551414  0.082  14
## 11 ht_recovfrominact penetrance_lqt2   1000   0.31291231  0.496   4
## 12  ht_taudeact_fast penetrance_lqt2   1000   0.40856426  0.189  13
## 13         pph2_prob penetrance_lqt2   1000   0.29038377  0.000 268
## 14     provean_score penetrance_lqt2   1000  -0.38127676  0.000 268
## 15        blast_pssm penetrance_lqt2   1000  -0.07798665  0.284 268
## 16          pamscore penetrance_lqt2   1000  -0.05859981  0.443 268
## 17   aasimilaritymat penetrance_lqt2   1000   0.12912560  0.079 268
## 18         lqt2_dist penetrance_lqt2   1000   0.46761571  0.000 268
## 19       revel_score penetrance_lqt2   1000   0.45089391  0.000 268
## 20          p_mean_w penetrance_lqt2   1000   0.51861171  0.000 268
rm(tmp)
rm(t)
i=0
tmp<-data.frame()
for (x in xList){
  i=i+2
  tmp[i-1,"Feature"]<-x
  t<-d[!is.na(d[,x]) & d$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  tmp[i,"Feature"]<-paste(x,"_cohort")
  t<-d[!is.na(d[,x]) & d$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  foo <- boot(t, function(data,indices)
  weightedCorr(t[indices,x],t$penetrance_lqt2[indices], method="spearman", weights = t$weight[indices]), R=1000)
  tmp[i-1,"Spearman"]<-foo$t0
  tmp[i-1,"Spearman_low"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[1][[1]]
  tmp[i-1,"Spearman_high"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[2][[1]]
  tmp[i-1,"n"]<-length(t[,x])
  t<-cohort.data[!is.na(cohort.data[,x]) & cohort.data$total_carriers>0,]
  t<-t[!is.na(t[,"var"]),]
  foo <- boot(t, function(data,indices)
  weightedCorr(t[indices,x],t$penetrance_lqt2[indices], method="spearman", weights = t$weight[indices]), R=1000)
  tmp[i,"Spearman"]<-foo$t0
  tmp[i,"Spearman_low"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[1][[1]]
  tmp[i,"Spearman_high"]<-quantile(foo$t,c(0.025,0.975), na.rm = T)[2][[1]]
  tmp[i,"n"]<-length(t[,x])
}

forestplot(tmp$Feature,tmp$Spearman,tmp$Spearman_low,tmp$Spearman_high)

# reset "tmp" variable
tmp<-cohort.data[!is.na(cohort.data$provean_score) & !is.na(cohort.data$revel_score) & !is.na(cohort.data$pamscore),]
# Weighted R2 between observed LQT2 penetrance and post-test probability
foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 penetrance versus observed cohort LQT2 penetrance")
## [1] "EM estimated LQT2 penetrance versus observed cohort LQT2 penetrance"
foo$t0
## [1] 0.3102379
quantile(foo$t,c(0.025,0.975))
##     2.5%    97.5% 
## 0.206992 0.422704
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 penetrance density versus observed cohort LQT2 penetrance")
## [1] "LQT2 penetrance density versus observed cohort LQT2 penetrance"
foo$t0
## [1] 0.2386498
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1462149 0.3424715
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL score versus observed cohort LQT2 penetrance")
## [1] "REVEL score versus observed cohort LQT2 penetrance"
foo$t0
## [1] 0.2200169
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1158602 0.3224962
# Evaluate only variants with Heterozygous peak tail current measured.
tmp<-cohort.data[!is.na(cohort.data$ht_tailPeak),]
resultTable<-calcAllPvals(yList, xList, 1000, 'weight', tmp)
##                    x               y nPerms weightedCorr pValue  n
## 1          hm_ssPeak penetrance_lqt2   1000   0.97286492  0.298  4
## 2        hm_tailPeak penetrance_lqt2   1000  -0.25115090  0.254 28
## 3        hm_vhalfact penetrance_lqt2   1000   0.10800601  0.736 15
## 4      hm_vhalfinact penetrance_lqt2   1000  -0.58787950  0.318  6
## 5  hm_recovfrominact penetrance_lqt2   1000  -1.00000000  0.000  2
## 6   hm_taudeact_fast penetrance_lqt2   1000   0.15426767  0.811  6
## 7          ht_ssPeak penetrance_lqt2   1000  -0.66768740  0.028 12
## 8        ht_tailPeak penetrance_lqt2   1000  -0.73814391  0.000 40
## 9        ht_vhalfact penetrance_lqt2   1000   0.22559402  0.358 21
## 10     ht_vhalfinact penetrance_lqt2   1000  -0.57071390  0.042 15
## 11 ht_recovfrominact penetrance_lqt2   1000   0.31291231  0.494  4
## 12  ht_taudeact_fast penetrance_lqt2   1000   0.55832008  0.092 11
## 13         pph2_prob penetrance_lqt2   1000   0.29181628  0.115 39
## 14     provean_score penetrance_lqt2   1000  -0.31546899  0.069 39
## 15        blast_pssm penetrance_lqt2   1000   0.20233110  0.306 39
## 16          pamscore penetrance_lqt2   1000  -0.02844874  0.865 40
## 17   aasimilaritymat penetrance_lqt2   1000   0.28888518  0.102 40
## 18         lqt2_dist penetrance_lqt2   1000   0.67282397  0.000 40
## 19       revel_score penetrance_lqt2   1000   0.72321081  0.000 40
## 20          p_mean_w penetrance_lqt2   1000   0.76322146  0.000 40
foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 penetrance versus observed cohort LQT2 penetrance")
## [1] "EM estimated LQT2 penetrance versus observed cohort LQT2 penetrance"
foo$t0
## [1] 0.5669383
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3054698 0.7637507
model <- lm(penetrance_lqt2~ht_tailPeak, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("Heterozygously measured peak tail current versus observed cohort LQT2 penetrance")
## [1] "Heterozygously measured peak tail current versus observed cohort LQT2 penetrance"
foo$t0
## [1] 0.4873147
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2938440 0.6757608
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 penetrance density versus observed cohort LQT2 penetrance")
## [1] "LQT2 penetrance density versus observed cohort LQT2 penetrance"
foo$t0
## [1] 0.4168252
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1648801 0.6448965
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL score versus observed cohort LQT2 penetrance")
## [1] "REVEL score versus observed cohort LQT2 penetrance"
foo$t0
## [1] 0.4662318
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2561232 0.7058627

Variance explained from literature dataset

load("lit_all_data_checkpoint.RData")

tmp<-d[!is.na(d$provean_score) & !is.na(d$penetrance_lqt2) & !is.na(d$lqt2_dist) & !is.na(d$revel_score),]

foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 penetrance versus observed literature LQT2 penetrance")
## [1] "EM estimated LQT2 penetrance versus observed literature LQT2 penetrance"
foo$t0
## [1] 0.8168311
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.7669018 0.8573807
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 penetrance density versus observed literature LQT2 penetrance")
## [1] "LQT2 penetrance density versus observed literature LQT2 penetrance"
foo$t0
## [1] 0.4902899
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3930717 0.5845911
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL versus observed literature LQT2 penetrance")
## [1] "REVEL versus observed literature LQT2 penetrance"
foo$t0
## [1] 0.3920345
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.3292966 0.4586301
# Evaluate only variants with Heterozygous peak tail current measured.
tmp<-d[!is.na(d$ht_tailPeak) & !is.na(d$provean_score) & !is.na(d$penetrance_lqt2) & !is.nan(d$penetrance_lqt2) & !is.na(d$lqt2_dist),]

foo <- boot(tmp, function(data,indices)
  weightedCorr(tmp$p_mean_w[indices],tmp$penetrance_lqt2[indices], method="pearson", weights = tmp$weight[indices])^2, R=1000)
print("EM estimated LQT2 penetrance versus observed literature LQT2 penetrance")
## [1] "EM estimated LQT2 penetrance versus observed literature LQT2 penetrance"
foo$t0
## [1] 0.8884971
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.8039869 0.9451064
model <- lm(penetrance_lqt2~lqt2_dist, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("LQT2 penetrance density versus observed literature LQT2 penetrance")
## [1] "LQT2 penetrance density versus observed literature LQT2 penetrance"
foo$t0
## [1] 0.5945224
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.4178803 0.7492548
model <- lm(penetrance_lqt2~ht_tailPeak, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("Heterozygous peak tail current versus observed literature LQT2 penetrance")
## [1] "Heterozygous peak tail current versus observed literature LQT2 penetrance"
foo$t0
## [1] 0.3390998
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.1722167 0.5589989
model <- lm(penetrance_lqt2~revel_score, data = tmp, weights = weight)
mod<-data.frame(model$fitted.values,model$model$penetrance_lqt2,model$weights)
foo <- boot(mod, function(data,indices)
  weightedCorr(mod$model.fitted.values[indices],mod$model.model.penetrance_lqt2[indices], method="pearson", weights = mod$model.weights[indices])^2, R=1000)
print("REVEL versus observed literature LQT2 penetrance")
## [1] "REVEL versus observed literature LQT2 penetrance"
foo$t0
## [1] 0.4286368
quantile(foo$t,c(0.025,0.975))
##      2.5%     97.5% 
## 0.2452794 0.6179631

Part 4: ROC and AUC plots

ROCs of observed cohort LQT2 penetrance with 0.5 as cutoff all variants.

AUCs from ROCs observed cohort LQT2 penetrance at multiple cutoffs

AUCs from ROCs predicting EM posteriors

## [1] "276   231"
## [1] "276   229"
## [1] "276   226"
## [1] "276   224"
## [1] "276   217"
## [1] "276   205"
## [1] "276   205"
## [1] "276   202"
## [1] "276   202"
## [1] "276   171"
## [1] "276   169"
## [1] "276   164"
## [1] "276   144"
## [1] "276   144"
## [1] "276   136"

ROCs of observed cohort LQT2 penetrance with single observation variants.

ROCs of observed literature LQT2 penetrance with 0.5 as cutoff.

AUCs from ROCs observed literature LQT2 penetrance at multiple cutoffs

## [1] "741   234"
## [1] "741   231"
## [1] "741   229"
## [1] "741   223"
## [1] "741   218"
## [1] "741   211"
## [1] "741   208"
## [1] "741   205"
## [1] "741   205"
## [1] "741   193"
## [1] "741   192"
## [1] "741   190"
## [1] "741   183"
## [1] "741   179"
## [1] "741   174"

ROC and AUC for N = 1 variants from the literature.

AUC for N = 1 variants from the literature.

## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"
## [1] "362   99"

Part 4: Forest plots

The code to produce the forest plots is shown in the code chunk.

rm(d)

# Loading combined literature, gnomAD, and cohort dataset 
load("lit_all_data_checkpoint.RData")
d<-d[d$total_carriers>0 & d$mut_type!="nonsense" & d$isoform=="A",]
d<-d[!is.na(d$var),]

mean.post <- (d$alpha + d$lqt2)/(d$alpha+d$beta+d$total_carriers)
mean.prior <- (d$alpha)/(d$alpha+d$beta)

lower.prior <- qbeta(0.025,d$alpha,d$beta)
higher.prior <- qbeta(0.975,d$alpha,d$beta) 

lower.post <- qbeta(0.025,d$alpha+d$lqt2,d$beta+d$total_carriers-d$lqt2)
higher.post <- qbeta(0.975,d$alpha+d$lqt2,d$beta+d$total_carriers-d$lqt2) 

forest.data.post <- data.frame(variant = d$var, mean=mean.post,
                          lower=lower.post, higher=higher.post, resnum=d$resnum, tc=d$total_carriers, lqt2=d$lqt2)
forest.data.post$group<-"posterior"

forest.data.prior <- data.frame(variant = d$var, mean=mean.prior,
                          lower=lower.prior, higher=higher.prior, resnum=d$resnum, tc=d$total_carriers, lqt2=d$lqt2)
forest.data.prior$group<-"prior"

forest.data<-rbind(forest.data.post, forest.data.prior)
forest.data<-forest.data[order(forest.data$resnum, forest.data$variant),]
forest.data$label<-""
forest.data[forest.data$group=="posterior","label"]<-paste(forest.data[forest.data$group=="posterior","lqt2"], "/", forest.data[forest.data$group=="posterior","tc"])

#define colours for dots and bars
dotCOLS = c("#866D4B","#000000")
barCOLS = c("#FFFFFF","#FFFFFF")

plotg <- function(a,b){
  fd<-forest.data[a:b,]
  png( paste("Plots/", a, "-",b,"pics.png",sep=""),res=300,height=10,width=10,units="in")
  p<-ggplot(fd, aes(x=reorder(variant,-resnum), y=mean, ymin=lower, ymax=higher, col=group, fill=group)) + 
  geom_text(data=fd, aes(x=reorder(variant,-resnum), label=label)) +
#specify position here
  geom_linerange(size=2,position=position_dodge(width = 1)) +
  geom_hline(yintercept=1, lty=1) +
  geom_hline(yintercept=0, lty=1) +
#specify position here too
  geom_point(size=2, shape=21, colour="white", stroke = 0.5,position=position_dodge(width = 1)) +
  scale_fill_manual(values=barCOLS)+
  scale_color_manual(values=dotCOLS)+
  scale_y_continuous(name="LQT2 Diagnosis Probability", limits = c(0, 1)) +
  coord_flip() +
  theme_minimal()
  print(p)
  dev.off() 
}

sapply(0:30*50+1,function(x) plotg(x,x+49) )

The forests plots are pasted below, where gold bars are posteriors, and black bars are EM priors. Variant name is given to the left and the number of LQT2 affected / total heterozygote count is given between the mean prior and mean posterior.